date.work <- "2025-04-10" 
dir.create(paste0("results/", date.work), recursive = TRUE)
results.folder <- paste0("results/", date.work)

`%!in%` = Negate(`%in%`)

library(tidyverse)
library(dplyr)
library(haven)
library(labelled)
library(splines)
library(brms)
library(survey)
library(lme4)
library(ggeffects)
library(data.table)
library(glmmML)
library(srvyr)
library(writexl)
library(cmdstanr)
library(gmodels)
library(Hmisc)

options(max.print = 10000)
options(survey.adjust.domain.lonely=TRUE)
options(survey.lonely.psu="adjust")

## appended data ----
Appended = read_stata("HPACC_Maindata_appended_20250407.dta") # 410492 w 77 variables 
setDT(Appended); nrow(Appended)

## survey_year ----
# convert survey_year to numeric
Appended[, survey_year_numeric:=as.numeric(factor(survey_year))] 

Appended[, survey_year_numeric2:=ifelse(survey_year_numeric==13, 14, survey_year_numeric)]

Appended[, `:=`(age_cat4=factor(case_when(age>=30 & age<40 ~1,
                                          age>=40 & age<50 ~2,
                                          age>=50 & age<60 ~3,
                                          age>=60 & age<70 ~4), 
                                levels=1:4, 
                                labels=c("30-39", "40-49", "50-59", "60-69")),
                age_cat3=factor(case_when(age>=40 & age<50 ~1,
                                          age>=50 & age<60 ~2,
                                          age>=60 & age<70 ~3), 
                                levels=1:3, 
                                labels=c("40-49", "50-59", "60-69")))]

Appended[, `:=`(age_cat8=factor(case_when(age>=30 & age<35 ~1,
                                          age>=35 & age<40 ~2,
                                          age>=40 & age<45 ~3,
                                          age>=45 & age<50 ~4,
                                          age>=50 & age<55 ~5,
                                          age>=55 & age<60 ~6,
                                          age>=60 & age<65 ~7,
                                          age>=65 & age<70 ~8), 
                                levels=1:8, 
                                labels=c("30-34", "35-39","40-44","45-49", "50-54","55-59",  "60-64",  "65-69")),
                age_cat6=factor(case_when(age>=40 & age<45 ~1,
                                          age>=45 & age<50 ~2,
                                          age>=50 & age<55 ~3,
                                          age>=55 & age<60 ~4,
                                          age>=60 & age<65 ~5,
                                          age>=65 & age<70 ~6), 
                                levels=1:6, 
                                labels=c("40-44","45-49", "50-54","55-59",  "60-64",  "65-69")))]

Appended[, bmi_cat6:=factor(case_when(bmi<18.5 ~1,
                                      bmi>=18.5 & bmi<25 ~2,
                                      bmi>=25 & bmi<30 ~3,
                                      bmi>=30 & bmi<35 ~4,
                                      bmi>=35 & bmi<40 ~5,
                                      bmi>=40 ~6), 
                            levels=1:6, 
                            labels=c("<18.5", "18.5-<25", "25-<30","30-<35","35-<40", ">=40"))]
names(Appended)

Appended1 <- copy(Appended)
Appended1 <- Appended1[!is.na(psu_num)] ;nrow(Appended1) 
Appended1 <- Appended1[!is.na(stratum_num)];nrow(Appended1)
Appended1 <- Appended1[,new_id:=1:.N]
save(Appended1, file="Appended1.rds")

## dm_diag----
# load("Appended1.rds")
dm <- Appended1[sample_dm_diag ==1];nrow(dm) # 33513
dm <- dm[!is.na(dm_diag)];nrow(dm) #33261
dm <- dm[!is.na(w3_who_30_69)] ;nrow(dm) 
dm <- dm[!is.na(w3_crude_30_69)] ;nrow(dm) 
dm <- dm[!is.na(psu_num)] ;nrow(dm) 
dm <- dm[!is.na(stratum_num)];nrow(dm) 
table(dm$dm_diag, exclude=NULL)
#     0     1 
# 13018 20243

# further delete
dm <- dm[!(is.na(bmi_cat4)|is.na(educat3))] ;nrow(dm) 
dm <- dm[!is.na(age)] ;nrow(dm) 
dm <- dm[!is.na(sex)] ;nrow(dm) 

dm <- dm[, `:=`(country_factor=factor(country),
                sex_factor=factor(sex),
                educat3_factor=factor(educat3),
                countryGDPclass_factor=factor(countryGDPclass),
                regioncat_factor=factor(ncdrisc_regioncat),
                dm_diag_factor=factor(dm_diag))]
dm <- dm %>% 
  droplevels

save(dm, file="dm.diag.rds") 

# model----
knot_interior_age <- quantile(dm$age, probs = c(0.275, 0.5, 0.725))
boundary_knots_age <- quantile(dm$age, probs = c(0.05, 0.95))
knot_interior_bmi <- quantile(dm$bmi , probs = c(0.275, 0.5, 0.725))
boundary_knots_bmi <- quantile(dm$bmi , probs = c(0.05, 0.95))

age_spline <- ns(dm$age, knots = knot_interior_age, Boundary.knots = boundary_knots_age)
bmi_spline <- ns(dm$bmi, knots = knot_interior_bmi, Boundary.knots = boundary_knots_bmi)

colnames(age_spline) <- paste0("age_s", 1:ncol(age_spline))
colnames(bmi_spline) <- paste0("bmi_s", 1:ncol(bmi_spline))

dm[, paste0("age_s", 1:ncol(age_spline)) := as.data.table(age_spline)]
dm[, paste0("bmi_s", 1:ncol(bmi_spline)) := as.data.table(bmi_spline)]

save(dm, file="dm.diag.rds") 

start.t <- Sys.time()  

fit.1 <- brm(dm_diag_factor ~ sex_factor+educat3_factor +age_cat4 + bmi_cat4 + countryGDPclass_factor +
                survey_year_numeric2 + I(survey_year_numeric2^2) + # quadratic term added
               (-1 + countryGDPclass_factor | country_factor) + (1 | regioncat_factor / country_factor),
              data = dm,
              family = bernoulli(link = "logit"),
              control = list(adapt_delta = 0.98, max_treedepth =15),
              iter = 1000, warmup = 500, chains = 4, cores = 4,
              seed=171511, backend = "cmdstanr")

fit.2 <- brm(dm_diag_factor ~ sex_factor+educat3_factor +age_cat4 + bmi_cat4 + countryGDPclass_factor +
               survey_year_numeric2 +
               (-1 + countryGDPclass_factor | country_factor) + (1 | regioncat_factor / country_factor),
             data = dm,
             family = bernoulli(link = "logit"),
             control = list(adapt_delta = 0.98, max_treedepth =15),
             iter = 1000, warmup = 500, chains = 4, cores = 4,
             seed=171511, backend = "cmdstanr")

fit.3 <- brm(dm_diag_factor ~ sex_factor+educat3_factor +age_cat8 + bmi_cat6 + countryGDPclass_factor +
               survey_year_numeric2 + I(survey_year_numeric2^2) + # quadratic term added
               (-1 + countryGDPclass_factor | country_factor) + (1 | regioncat_factor / country_factor),
             data = dm,
             family = bernoulli(link = "logit"),
             control = list(adapt_delta = 0.98, max_treedepth =15),
             iter = 1000, warmup = 500, chains = 4, cores = 4,
             seed=171511, backend = "cmdstanr",
             save_pars = save_pars(all = TRUE))

fit.4 <- brm(dm_diag_factor ~ sex_factor+age_cat4 + educat3_factor +
               bmi_cat4 + countryGDPclass_factor + survey_year_numeric2 + I(survey_year_numeric2^2) + # quadratic term added
               regioncat_factor + regioncat_factor*bmi_cat4 +
               (1 | country_factor) + (-1 + countryGDPclass_factor | country_factor),
             data = subset(dm, sample_dm_diag==1),
             family = bernoulli(link = "logit"),
             control = list(adapt_delta = 0.98, max_treedepth =15),
             iter = 1000, warmup = 500, chains = 4, cores = 4,
             seed=171511, backend = "cmdstanr",
             save_pars = save_pars(all = TRUE))

fit.5 <- brm(dm_diag_factor ~ sex_factor+educat3_factor +ns(age, df=4) + ns(bmi, df=4) + countryGDPclass_factor +
               survey_year_numeric2 + I(survey_year_numeric2^2) + # quadratic term added
               (-1 + countryGDPclass_factor | country_factor) + (1 | regioncat_factor / country_factor),
             data = dm,
             family = bernoulli(link = "logit"),
             control = list(adapt_delta = 0.98, max_treedepth =15),
             iter = 1000, warmup = 500, chains = 4, cores = 4,
             seed=171511, backend = "cmdstanr")

fit.6 <- brm(dm_diag_factor ~ sex_factor+educat3_factor +  age_s1 + age_s2 + age_s3 + age_s4 +  # adjust depending on how many columns were created
               bmi_s1 + bmi_s2 + bmi_s3 + bmi_s4 + countryGDPclass_factor +
               survey_year_numeric2 + I(survey_year_numeric2^2) + # quadratic term added
               (-1 + countryGDPclass_factor | country_factor) + (1 | regioncat_factor / country_factor),
             data = dm,
             family = bernoulli(link = "logit"),
             control = list(adapt_delta = 0.98, max_treedepth =15),
             iter = 1000, warmup = 500, chains = 4, cores = 4,
             seed=171511, backend = "cmdstanr")

end.t <- Sys.time()
take.time <- end.t-start.t 

save(fit.1, file="fit1_dm_diag.rds")
save(fit.2, file="fit2_dm_diag.rds")
save(fit.3, file="fit3_dm_diag.rds")
save(fit.4, file="fit4_dm_diag.rds")
save(fit.5, file="fit5_dm_diag.rds")
save(fit.6, file="fit6_dm_diag.rds")

# test loo----
library(loo)
loo1 <- loo(fit.1)
loo2 <- loo(fit.2)
loo3 <- loo(fit.3)
loo4 <- loo(fit.4) 
loo5 <- loo(fit.5)
loo6 <- loo(fit.6)

loo_comparison <- loo_compare(loo1, loo2, loo3, loo4, loo5, loo6)
print(loo_comparison)

summary(fit.6, waic = TRUE) 

# data for post estimation
new.data <- dm %>% mutate(survey_year_numeric2=12) %>% 
  droplevels()

post <- brms::posterior_epred(fit.6, newdata=new.data) 
posterior_predictions <- as.data.frame(t(post))

# Add country region GDP stratum_num psu_num wpopnr_sample variables to predictions 
posterior_predictions <- posterior_predictions %>% 
  bind_cols(new.data %>% select(w3_who_30_69, w3_crude_30_69, sample_dm_diag, age_cat4, age_cat8, dm_diag_factor, 
                                sex_factor, educat3_factor, country_factor, regioncat_factor, countryGDPclass_factor, 
                                stratum_num, psu_num, new_id,
                                age, bmi)) 
setDT(posterior_predictions)

# Append records with sample_dm_diag==0 ----
dm.0 <- Appended1[sample_dm_diag ==0];nrow(dm.0) # 376979
dm.0 <- dm.0[, .(new_id, sample_dm_diag, stratum_num, psu_num, w3_who_30_69, w3_crude_30_69)];nrow(dm.0) # 376979
dm.0 <- dm.0[, w3_who_30_69:=0]
dm.0 <- dm.0[, w3_crude_30_69:=0]

missing_cols <- setdiff(names(posterior_predictions), names(dm.0))
for (col in missing_cols) {
  col_class <- class(posterior_predictions[[col]])[1]
  
  # assign proper NA based on class
  na_value <- switch(col_class,
                     "factor" = factor(NA, levels = levels(posterior_predictions[[col]])),
                     "character" = NA_character_,
                     "integer" = NA_integer_,
                     "numeric" = NA_real_,
                     "logical" = NA,
                     NA)  # default fallback
  
  dm.0[, (col) := na_value]
}

setcolorder(dm.0, names(posterior_predictions))

combined.dm_diag <- rbind(posterior_predictions, dm.0, use.names = TRUE, fill = TRUE)
combined.dm_diag <- as.data.frame(combined.dm_diag)
combined.dm_diag <- combined.dm_diag %>% 
  mutate(bmi_cat_factor=factor(case_when(bmi<18.5 ~1,
                                         bmi>=18.5 & bmi<25 ~2,
                                         bmi>=25 & bmi<30 ~3,
                                         bmi>=30 ~4,
                                         TRUE ~NA_real_),
                               levels=1:4,
                               labels=c("<18.5", "18.5-<25", "25-<30", ">=30")))

# design for WHO standardization
design.dm_diag = svydesign(data = combined.dm_diag, strata = ~ stratum_num, ids = ~ psu_num, weights = ~ w3_who_30_69, nest = T)
saveRDS(design.dm_diag, file="design.dm_diag.WHO.rds")

# design for no WHO standardization
design.dm_diag = svydesign(data = combined.dm_diag, strata = ~ stratum_num, ids = ~ psu_num, weights = ~ w3_crude_30_69, nest = T)
saveRDS(design.dm_diag, file="design.dm_diag.CRUDE.rds")

## dm_control_diag----
dm <- Appended1[sample_dm_control_diag ==1];nrow(dm) # 20243
dm <- dm[!is.na(dm_control)];nrow(dm) #20151
dm <- dm[!is.na(w3_who_30_69)] ;nrow(dm) 
dm <- dm[!is.na(w3_crude_30_69)] ;nrow(dm) 
dm <- dm[!is.na(psu_num)] ;nrow(dm) 
dm <- dm[!is.na(stratum_num)];nrow(dm) 
table(dm$dm_control, exclude=NULL)

# further delete
dm <- dm[!(is.na(bmi_cat4)|is.na(educat3))] ;nrow(dm) 
dm <- dm[!is.na(age)] ;nrow(dm) 
dm <- dm[!is.na(sex)] ;nrow(dm) 

dm <- dm[, `:=`(country_factor=factor(country),
                sex_factor=factor(sex),
                educat3_factor=factor(educat3),
                countryGDPclass_factor=factor(countryGDPclass),
                regioncat_factor=factor(ncdrisc_regioncat),
                dm_control_factor=factor(dm_control))]
dm <- dm %>% 
  droplevels

save(dm, file="dm_control.diag.rds") 

# model----
knot_interior_age <- quantile(dm$age, probs = c(0.275, 0.5, 0.725))
boundary_knots_age <- quantile(dm$age, probs = c(0.05, 0.95))
knot_interior_bmi <- quantile(dm$bmi , probs = c(0.275, 0.5, 0.725))
boundary_knots_bmi <- quantile(dm$bmi , probs = c(0.05, 0.95))

age_spline <- ns(dm$age, knots = knot_interior_age, Boundary.knots = boundary_knots_age)
bmi_spline <- ns(dm$bmi, knots = knot_interior_bmi, Boundary.knots = boundary_knots_bmi)

colnames(age_spline) <- paste0("age_s", 1:ncol(age_spline))
colnames(bmi_spline) <- paste0("bmi_s", 1:ncol(bmi_spline))

dm[, paste0("age_s", 1:ncol(age_spline)) := as.data.table(age_spline)]
dm[, paste0("bmi_s", 1:ncol(bmi_spline)) := as.data.table(bmi_spline)]

save(dm, file="dm_control.diag.rds") 

start.t <- Sys.time()  

fit.1 <- brm(dm_control_factor ~ sex_factor+educat3_factor +age_cat4 + bmi_cat4 + countryGDPclass_factor +
                survey_year_numeric2 + I(survey_year_numeric2^2) + # quadratic term added
               (-1 + countryGDPclass_factor | country_factor) + (1 | regioncat_factor / country_factor),
              data = dm,
              family = bernoulli(link = "logit"),
              control = list(adapt_delta = 0.98, max_treedepth =15),
              iter = 1000, warmup = 500, chains = 4, cores = 4,
              seed=171511, backend = "cmdstanr")

fit.2 <- brm(dm_control_factor ~ sex_factor+educat3_factor +age_cat4 + bmi_cat4 + countryGDPclass_factor +
               survey_year_numeric2 +
               (-1 + countryGDPclass_factor | country_factor) + (1 | regioncat_factor / country_factor),
             data = dm,
             family = bernoulli(link = "logit"),
             control = list(adapt_delta = 0.98, max_treedepth =15),
             iter = 1000, warmup = 500, chains = 4, cores = 4,
             seed=171511, backend = "cmdstanr")

fit.3 <- brm(dm_control_factor ~ sex_factor+educat3_factor +age_cat8 + bmi_cat6 + countryGDPclass_factor +
               survey_year_numeric2 + I(survey_year_numeric2^2) + # quadratic term added
               (-1 + countryGDPclass_factor | country_factor) + (1 | regioncat_factor / country_factor),
             data = dm,
             family = bernoulli(link = "logit"),
             control = list(adapt_delta = 0.98, max_treedepth =15),
             iter = 1000, warmup = 500, chains = 4, cores = 4,
             seed=171511, backend = "cmdstanr",
             save_pars = save_pars(all = TRUE))


fit.4 <- brm(dm_control_factor ~ sex_factor+age_cat4 + educat3_factor +
               bmi_cat4 + countryGDPclass_factor + survey_year_numeric2 + I(survey_year_numeric2^2) + # quadratic term added
               regioncat_factor + regioncat_factor*bmi_cat4 +
               (1 | country_factor) + (-1 + countryGDPclass_factor | country_factor),
             data = subset(dm, sample_dm_diag==1),
             family = bernoulli(link = "logit"),
             control = list(adapt_delta = 0.98, max_treedepth =15),
             iter = 1000, warmup = 500, chains = 4, cores = 4,
             seed=171511, backend = "cmdstanr",
             save_pars = save_pars(all = TRUE))

fit.5 <- brm(dm_control_factor ~ sex_factor+educat3_factor +ns(age, df=4) + ns(bmi, df=4) + countryGDPclass_factor +
               survey_year_numeric2 + I(survey_year_numeric2^2) + # quadratic term added
               (-1 + countryGDPclass_factor | country_factor) + (1 | regioncat_factor / country_factor),
             data = dm,
             family = bernoulli(link = "logit"),
             control = list(adapt_delta = 0.98, max_treedepth =15),
             iter = 1000, warmup = 500, chains = 4, cores = 4,
             seed=171511, backend = "cmdstanr")

fit.6 <- brm(dm_control_factor ~ sex_factor+educat3_factor +  age_s1 + age_s2 + age_s3 + age_s4 +  # adjust depending on how many columns were created
               bmi_s1 + bmi_s2 + bmi_s3 + bmi_s4 + countryGDPclass_factor +
               survey_year_numeric2 + I(survey_year_numeric2^2) + # quadratic term added
               (-1 + countryGDPclass_factor | country_factor) + (1 | regioncat_factor / country_factor),
             data = dm,
             family = bernoulli(link = "logit"),
             control = list(adapt_delta = 0.98, max_treedepth =15),
             iter = 1000, warmup = 500, chains = 4, cores = 4,
             seed=171511, backend = "cmdstanr")

end.t <- Sys.time()
take.time <- end.t-start.t # 12.75191 hours

save(fit.1, file="fit1_dm_control.diag.rds")
save(fit.2, file="fit2_dm_control.diag.rds")
save(fit.3, file="fit3_dm_control.diag.rds")
save(fit.4, file="fit4_dm_control.diag.rds")
save(fit.5, file="fit5_dm_control.diag.rds")
save(fit.6, file="fit6_dm_control.diag.rds")

# test loo----
library(loo)

loo1 <- loo(fit.1)
loo2 <- loo(fit.2)
loo3 <- loo(fit.3)
loo4 <- loo(fit.4) 
loo5 <- loo(fit.5)
loo6 <- loo(fit.6)

loo_comparison <- loo_compare(loo1, loo2, loo3, loo4, loo5, loo6)
print(loo_comparison)

summary(fit.6, waic=TRUE)

# make data for post-estimation
new.data <- dm %>% mutate(survey_year_numeric2=12) %>% #11
  droplevels()

post <- brms::posterior_epred(fit.6, newdata=new.data) #matrix
posterior_predictions <- as.data.frame(t(post))

# Add country region GDP stratum_num psu_num wpopnr_sample variables to predictions 
posterior_predictions <- posterior_predictions %>% 
  bind_cols(new.data %>% select(w3_who_30_69, w3_crude_30_69, sample_dm_control_diag, age_cat4, age_cat8, dm_control_factor, 
                                sex_factor, educat3_factor, country_factor, regioncat_factor, countryGDPclass_factor, 
                                stratum_num, psu_num, new_id, age, bmi)) 
setDT(posterior_predictions)

# Append records with sample_dm_control_diag==0 ----
dm.0 <- Appended1[sample_dm_control_diag ==0];nrow(dm.0) # 381664
dm.0 <- dm.0[, .(new_id, sample_dm_control_diag, stratum_num, psu_num, w3_who_30_69, w3_crude_30_69)];nrow(dm.0) 
dm.0 <- dm.0[, w3_who_30_69:=0]
dm.0 <- dm.0[, w3_crude_30_69:=0]

missing_cols <- setdiff(names(posterior_predictions), names(dm.0))
for (col in missing_cols) {
  col_class <- class(posterior_predictions[[col]])[1]
  
  # assign proper NA based on class
  na_value <- switch(col_class,
                     "factor" = factor(NA, levels = levels(posterior_predictions[[col]])),
                     "character" = NA_character_,
                     "integer" = NA_integer_,
                     "numeric" = NA_real_,
                     "logical" = NA,
                     NA)  # default fallback
  
  dm.0[, (col) := na_value]
}

setcolorder(dm.0, names(posterior_predictions))

combined.dm_control_diag <- rbind(posterior_predictions, dm.0, use.names = TRUE, fill = TRUE)
combined.dm_control_diag <- as.data.frame(combined.dm_control_diag)
combined.dm_control_diag <- combined.dm_control_diag %>% 
  mutate(bmi_cat_factor=factor(case_when(bmi<18.5 ~1,
                                         bmi>=18.5 & bmi<25 ~2,
                                         bmi>=25 & bmi<30 ~3,
                                         bmi>=30 ~4,
                                         TRUE ~NA_real_),
                               levels=1:4,
                               labels=c("<18.5", "18.5-<25", "25-<30", ">=30")))

# design for WHO standardization
design.dm_control_diag = svydesign(data = combined.dm_control_diag, strata = ~ stratum_num, ids = ~ psu_num, weights = ~ w3_who_30_69, nest = T)
saveRDS(design.dm_control_diag, file="design.dm_control_diag.WHO.rds")

# design for no WHO standardization
design.dm_control_diag = svydesign(data = combined.dm_control_diag, strata = ~ stratum_num, ids = ~ psu_num, weights = ~ w3_crude_30_69, nest = T)
saveRDS(design.dm_control_diag, file="design.dm_control_diag.CRUDE.rds")

## bp_control_diag----
dm <- Appended1[sample_bp_control_diag ==1];nrow(dm) # 20243
dm <- dm[!is.na(bp_control)];nrow(dm) #19686
dm <- dm[!is.na(w3_who_30_69)] ;nrow(dm) 
dm <- dm[!is.na(w3_crude_30_69)] ;nrow(dm) 
dm <- dm[!is.na(psu_num)] ;nrow(dm) 
dm <- dm[!is.na(stratum_num)];nrow(dm) 
table(dm$bp_control, exclude=NULL)

# further delete
dm <- dm[!(is.na(bmi_cat4)|is.na(educat3))] ;nrow(dm) 
dm <- dm[!is.na(age)] ;nrow(dm) 
dm <- dm[!is.na(sex)] ;nrow(dm) 

dm <- dm[, `:=`(country_factor=factor(country),
                sex_factor=factor(sex),
                educat3_factor=factor(educat3),
                countryGDPclass_factor=factor(countryGDPclass),
                regioncat_factor=factor(ncdrisc_regioncat),
                bp_control_factor=factor(bp_control))]
dm <- dm %>% 
  droplevels

save(dm, file="bp_control.diag.rds") 

# model----
knot_interior_age <- quantile(dm$age, probs = c(0.275, 0.5, 0.725))
boundary_knots_age <- quantile(dm$age, probs = c(0.05, 0.95))
knot_interior_bmi <- quantile(dm$bmi , probs = c(0.275, 0.5, 0.725))
boundary_knots_bmi <- quantile(dm$bmi , probs = c(0.05, 0.95))

age_spline <- ns(dm$age, knots = knot_interior_age, Boundary.knots = boundary_knots_age)
bmi_spline <- ns(dm$bmi, knots = knot_interior_bmi, Boundary.knots = boundary_knots_bmi)

colnames(age_spline) <- paste0("age_s", 1:ncol(age_spline))
colnames(bmi_spline) <- paste0("bmi_s", 1:ncol(bmi_spline))

dm[, paste0("age_s", 1:ncol(age_spline)) := as.data.table(age_spline)]
dm[, paste0("bmi_s", 1:ncol(bmi_spline)) := as.data.table(bmi_spline)]

save(dm, file="bp_control.diag.rds") 
start.t <- Sys.time()  

fit.1 <- brm(bp_control_factor ~ sex_factor+educat3_factor +age_cat4 + bmi_cat4 + countryGDPclass_factor +
               survey_year_numeric2 + I(survey_year_numeric2^2) + # quadratic term added
               (-1 + countryGDPclass_factor | country_factor) + (1 | regioncat_factor / country_factor),
             data = dm,
             family = bernoulli(link = "logit"),
             control = list(adapt_delta = 0.98, max_treedepth =15),
             iter = 1000, warmup = 500, chains = 4, cores = 4,
             seed=171511, backend = "cmdstanr")

fit.2 <- brm(bp_control_factor ~ sex_factor+educat3_factor +age_cat4 + bmi_cat4 + countryGDPclass_factor +
               survey_year_numeric2 +
               (-1 + countryGDPclass_factor | country_factor) + (1 | regioncat_factor / country_factor),
             data = dm,
             family = bernoulli(link = "logit"),
             control = list(adapt_delta = 0.98, max_treedepth =15),
             iter = 1000, warmup = 500, chains = 4, cores = 4,
             seed=171511, backend = "cmdstanr")

fit.3 <- brm(bp_control_factor ~ sex_factor+educat3_factor +age_cat8 + bmi_cat6 + countryGDPclass_factor +
               survey_year_numeric2 + I(survey_year_numeric2^2) + # quadratic term added
               (-1 + countryGDPclass_factor | country_factor) + (1 | regioncat_factor / country_factor),
             data = dm,
             family = bernoulli(link = "logit"),
             control = list(adapt_delta = 0.98, max_treedepth =15),
             iter = 1000, warmup = 500, chains = 4, cores = 4,
             seed=171511, backend = "cmdstanr",
             save_pars = save_pars(all = TRUE))

fit.4 <- brm(bp_control_factor ~ sex_factor+age_cat4 + educat3_factor +
               bmi_cat4 + countryGDPclass_factor + survey_year_numeric2 + I(survey_year_numeric2^2) + # quadratic term added
               regioncat_factor + regioncat_factor*bmi_cat4 +
               (1 | country_factor) + (-1 + countryGDPclass_factor | country_factor),
             data = subset(dm, sample_dm_diag==1),
             family = bernoulli(link = "logit"),
             control = list(adapt_delta = 0.98, max_treedepth =15),
             iter = 1000, warmup = 500, chains = 4, cores = 4,
             seed=171511, backend = "cmdstanr",
             save_pars = save_pars(all = TRUE))

fit.5 <- brm(bp_control_factor ~ sex_factor+educat3_factor +ns(age, df=4) + ns(bmi, df=4) + countryGDPclass_factor +
               survey_year_numeric2 + I(survey_year_numeric2^2) + # quadratic term added
               (-1 + countryGDPclass_factor | country_factor) + (1 | regioncat_factor / country_factor),
             data = dm,
             family = bernoulli(link = "logit"),
             control = list(adapt_delta = 0.98, max_treedepth =15),
             iter = 1000, warmup = 500, chains = 4, cores = 4,
             seed=171511, backend = "cmdstanr")

fit.6 <- brm(bp_control_factor ~ sex_factor+educat3_factor +  age_s1 + age_s2 + age_s3 + age_s4 +  # adjust depending on how many columns were created
               bmi_s1 + bmi_s2 + bmi_s3 + bmi_s4 + countryGDPclass_factor +
               survey_year_numeric2 + I(survey_year_numeric2^2) + # quadratic term added
               (-1 + countryGDPclass_factor | country_factor) + (1 | regioncat_factor / country_factor),
             data = dm,
             family = bernoulli(link = "logit"),
             control = list(adapt_delta = 0.98, max_treedepth =15),
             iter = 1000, warmup = 500, chains = 4, cores = 4,
             seed=171511, backend = "cmdstanr")

end.t <- Sys.time()
take.time <- end.t-start.t 

save(fit.1, file="fit1_bp_control.diag.rds")
save(fit.2, file="fit2_bp_control.diag.rds")
save(fit.3, file="fit3_bp_control.diag.rds")
save(fit.4, file="fit4_bp_control.diag.rds")
save(fit.5, file="fit5_bp_control.diag.rds")
save(fit.6, file="fit6_bp_control.diag.rds")
# test loo----
library(loo)

loo1 <- loo(fit.1)
loo2 <- loo(fit.2)
loo3 <- loo(fit.3)
loo4 <- loo(fit.4) 
loo5 <- loo(fit.5)
loo6 <- loo(fit.6)

loo_comparison <- loo_compare(loo1, loo2, loo3, loo4, loo5, loo6)

print(loo_comparison)

summary(fit.6, waic=TRUE)

# make data for post-estimation
new.data <- dm %>% mutate(survey_year_numeric2=12) %>% #11
  droplevels()

post <- brms::posterior_epred(fit.6, newdata=new.data) 
posterior_predictions <- as.data.frame(t(post))


# Add country region GDP stratum_num psu_num wpopnr_sample variables to predictions 
posterior_predictions <- posterior_predictions %>% 
  bind_cols(new.data %>% select(w3_who_30_69, w3_crude_30_69, sample_bp_control_diag, age_cat4, age_cat8, bp_control_factor, 
                                sex_factor, educat3_factor, country_factor, regioncat_factor, countryGDPclass_factor, 
                                stratum_num, psu_num, new_id, age, bmi)) 
setDT(posterior_predictions)

# Append records with sample_bp_control_diag==0 ----
dm.0 <- Appended1[sample_bp_control_diag ==0];nrow(dm.0) # 381664
dm.0 <- dm.0[, .(new_id, sample_bp_control_diag, stratum_num, psu_num, w3_who_30_69, w3_crude_30_69)];nrow(dm.0) 
dm.0 <- dm.0[, w3_who_30_69:=0]
dm.0 <- dm.0[, w3_crude_30_69:=0]

missing_cols <- setdiff(names(posterior_predictions), names(dm.0))
for (col in missing_cols) {
  col_class <- class(posterior_predictions[[col]])[1]
  
  # assign proper NA based on class
  na_value <- switch(col_class,
                     "factor" = factor(NA, levels = levels(posterior_predictions[[col]])),
                     "character" = NA_character_,
                     "integer" = NA_integer_,
                     "numeric" = NA_real_,
                     "logical" = NA,
                     NA)  # default fallback
  
  dm.0[, (col) := na_value]
}

setcolorder(dm.0, names(posterior_predictions))

combined.bp_control_diag <- rbind(posterior_predictions, dm.0, use.names = TRUE, fill = TRUE)
combined.bp_control_diag <- as.data.frame(combined.bp_control_diag)
combined.bp_control_diag <- combined.bp_control_diag %>% 
  mutate(bmi_cat_factor=factor(case_when(bmi<18.5 ~1,
                                         bmi>=18.5 & bmi<25 ~2,
                                         bmi>=25 & bmi<30 ~3,
                                         bmi>=30 ~4,
                                         TRUE ~NA_real_),
                               levels=1:4,
                               labels=c("<18.5", "18.5-<25", "25-<30", ">=30")))

# design for WHO standardization
design.bp_control_diag = svydesign(data = combined.bp_control_diag, strata = ~ stratum_num, ids = ~ psu_num, weights = ~ w3_who_30_69, nest = T)
saveRDS(design.bp_control_diag, file="design.bp_control_diag.WHO.rds")

# design for no WHO standardization
design.bp_control_diag = svydesign(data = combined.bp_control_diag, strata = ~ stratum_num, ids = ~ psu_num, weights = ~ w3_crude_30_69, nest = T)
saveRDS(design.bp_control_diag, file="design.bp_control_diag.CRUDE.rds")

## statin_dm_diag----
dm.0 <- Appended1[sample_statin_dm_diag ==0];nrow(dm.0) # 382993
dm <- Appended1[sample_statin_dm_diag ==1];nrow(dm) # 18914
dm <- dm[!is.na(statin_dm)];nrow(dm) #16424
dm <- dm[!is.na(w3_who_40_69)] ;nrow(dm) 
dm <- dm[!is.na(w3_crude_40_69)] ;nrow(dm) 
dm <- dm[!is.na(psu_num)] ;nrow(dm) 
dm <- dm[!is.na(stratum_num)];nrow(dm) 
table(dm$statin_dm, exclude=NULL)

# further delete
dm <- dm[!(is.na(bmi_cat4)|is.na(educat3))] ;nrow(dm) 
dm <- dm[!is.na(age)] ;nrow(dm) 
dm <- dm[!is.na(sex)] ;nrow(dm) 

dm <- dm[, `:=`(country_factor=factor(country),
                sex_factor=factor(sex),
                educat3_factor=factor(educat3),
                countryGDPclass_factor=factor(countryGDPclass),
                regioncat_factor=factor(ncdrisc_regioncat),
                statin_dm_factor=factor(statin_dm))]
dm <- dm %>% 
  droplevels

save(dm, file="statin_dm_diag.rds") 

# model----
knot_interior_age <- quantile(dm$age, probs = c(0.275, 0.5, 0.725))
boundary_knots_age <- quantile(dm$age, probs = c(0.05, 0.95))
knot_interior_bmi <- quantile(dm$bmi , probs = c(0.275, 0.5, 0.725))
boundary_knots_bmi <- quantile(dm$bmi , probs = c(0.05, 0.95))

age_spline <- ns(dm$age, knots = knot_interior_age, Boundary.knots = boundary_knots_age)
bmi_spline <- ns(dm$bmi, knots = knot_interior_bmi, Boundary.knots = boundary_knots_bmi)

colnames(age_spline) <- paste0("age_s", 1:ncol(age_spline))
colnames(bmi_spline) <- paste0("bmi_s", 1:ncol(bmi_spline))

dm[, paste0("age_s", 1:ncol(age_spline)) := as.data.table(age_spline)]
dm[, paste0("bmi_s", 1:ncol(bmi_spline)) := as.data.table(bmi_spline)]

save(dm, file="statin_dm_diag.rds") 

start.t <- Sys.time()  

fit.1 <- brm(statin_dm_factor ~ sex_factor+educat3_factor +age_cat3 + bmi_cat4 + countryGDPclass_factor +
               survey_year_numeric2 + I(survey_year_numeric2^2) + # quadratic term added
               (-1 + countryGDPclass_factor | country_factor) + (1 | regioncat_factor / country_factor),
             data = dm,
             family = bernoulli(link = "logit"),
             control = list(adapt_delta = 0.98, max_treedepth =15),
             iter = 1000, warmup = 500, chains = 4, cores = 4,
             seed=171511, backend = "cmdstanr")

fit.2 <- brm(statin_dm_factor ~ sex_factor+educat3_factor +age_cat3 + bmi_cat4 + countryGDPclass_factor +
               survey_year_numeric2 +
               (-1 + countryGDPclass_factor | country_factor) + (1 | regioncat_factor / country_factor),
             data = dm,
             family = bernoulli(link = "logit"),
             control = list(adapt_delta = 0.98, max_treedepth =15),
             iter = 1000, warmup = 500, chains = 4, cores = 4,
             seed=171511, backend = "cmdstanr")

fit.3 <- brm(statin_dm_factor ~ sex_factor+educat3_factor +age_cat6 + bmi_cat6 + countryGDPclass_factor +
               survey_year_numeric2 + I(survey_year_numeric2^2) + # quadratic term added
               (-1 + countryGDPclass_factor | country_factor) + (1 | regioncat_factor / country_factor),
             data = dm,
             family = bernoulli(link = "logit"),
             control = list(adapt_delta = 0.98, max_treedepth =15),
             iter = 1000, warmup = 500, chains = 4, cores = 4,
             seed=171511, backend = "cmdstanr",
             save_pars = save_pars(all = TRUE))

fit.4 <- brm(statin_dm_factor ~ sex_factor+age_cat3 + educat3_factor +
               bmi_cat4 + countryGDPclass_factor + survey_year_numeric2 + I(survey_year_numeric2^2) + # quadratic term added
               regioncat_factor + regioncat_factor*bmi_cat4 +
               (1 | country_factor) + (-1 + countryGDPclass_factor | country_factor),
             data = subset(dm, sample_dm_diag==1),
             family = bernoulli(link = "logit"),
             control = list(adapt_delta = 0.98, max_treedepth =15),
             iter = 1000, warmup = 500, chains = 4, cores = 4,
             seed=171511, backend = "cmdstanr",
             save_pars = save_pars(all = TRUE))

fit.5 <- brm(statin_dm_factor ~ sex_factor+educat3_factor +ns(age, df=4) + ns(bmi, df=4) + countryGDPclass_factor +
               survey_year_numeric2 + I(survey_year_numeric2^2) + # quadratic term added
               (-1 + countryGDPclass_factor | country_factor) + (1 | regioncat_factor / country_factor),
             data = dm,
             family = bernoulli(link = "logit"),
             control = list(adapt_delta = 0.98, max_treedepth =15),
             iter = 1000, warmup = 500, chains = 4, cores = 4,
             seed=171511, backend = "cmdstanr")

fit.6 <- brm(statin_dm_factor ~ sex_factor+educat3_factor +  age_s1 + age_s2 + age_s3 + age_s4 +  # adjust depending on how many columns were created
               bmi_s1 + bmi_s2 + bmi_s3 + bmi_s4 + countryGDPclass_factor +
               survey_year_numeric2 + I(survey_year_numeric2^2) + # quadratic term added
               (-1 + countryGDPclass_factor | country_factor) + (1 | regioncat_factor / country_factor),
             data = dm,
             family = bernoulli(link = "logit"),
             control = list(adapt_delta = 0.98, max_treedepth =15),
             iter = 1000, warmup = 500, chains = 4, cores = 4,
             seed=171511, backend = "cmdstanr")

end.t <- Sys.time()
take.time <- end.t-start.t 

save(fit.1, file="fit1_statin_dm.diag.rds")
save(fit.2, file="fit2_statin_dm.diag.rds")
save(fit.3, file="fit3_statin_dm.diag.rds")
save(fit.4, file="fit4_statin_dm.diag.rds")
save(fit.5, file="fit5_statin_dm.diag.rds")
save(fit.6, file="fit6_statin_dm.diag.rds")
# test loo----
library(loo)

loo1 <- loo(fit.1)
loo2 <- loo(fit.2)
loo3 <- loo(fit.3)
loo4 <- loo(fit.4) 
loo5 <- loo(fit.5)
loo6 <- loo(fit.6)

loo_comparison <- loo_compare(loo1, loo2, loo3, loo4, loo5, loo6)

print(loo_comparison)

summary(fit.6, waic=TRUE)

# make data for post-estimation
new.data <- dm %>% mutate(survey_year_numeric2=12) %>% 
  droplevels()

post <- brms::posterior_epred(fit.6, newdata=new.data) 
posterior_predictions <- as.data.frame(t(post))

# Add country region GDP stratum_num psu_num wpopnr_sample variables to predictions 
posterior_predictions <- posterior_predictions %>% 
  bind_cols(new.data %>% select(w3_who_40_69, w3_crude_40_69, sample_statin_dm_diag, age_cat3, age_cat6, statin_dm_factor, 
                                sex_factor, educat3_factor, country_factor, regioncat_factor, countryGDPclass_factor, 
                                stratum_num, psu_num, new_id, age, bmi)) 
setDT(posterior_predictions)

# Append records with sample_statin_dm_diag==0 ----
dm.0 <- Appended1[sample_statin_dm_diag ==0];nrow(dm.0) # 382993
dm.0 <- dm.0[, .(new_id, sample_statin_dm_diag, stratum_num, psu_num, w3_who_40_69, w3_crude_40_69)];nrow(dm.0) 
dm.0 <- dm.0[, w3_who_40_69:=0]
dm.0 <- dm.0[, w3_crude_40_69:=0]

missing_cols <- setdiff(names(posterior_predictions), names(dm.0))
for (col in missing_cols) {
  col_class <- class(posterior_predictions[[col]])[1]
  
  # assign proper NA based on class
  na_value <- switch(col_class,
                     "factor" = factor(NA, levels = levels(posterior_predictions[[col]])),
                     "character" = NA_character_,
                     "integer" = NA_integer_,
                     "numeric" = NA_real_,
                     "logical" = NA,
                     NA)  # default fallback
  
  dm.0[, (col) := na_value]
}

setcolorder(dm.0, names(posterior_predictions))

combined.statin_dm_diag <- rbind(posterior_predictions, dm.0, use.names = TRUE, fill = TRUE)
combined.statin_dm_diag <- as.data.frame(combined.statin_dm_diag)
combined.statin_dm_diag <- combined.statin_dm_diag %>% 
  mutate(bmi_cat_factor=factor(case_when(bmi<18.5 ~1,
                                         bmi>=18.5 & bmi<25 ~2,
                                         bmi>=25 & bmi<30 ~3,
                                         bmi>=30 ~4,
                                         TRUE ~NA_real_),
                               levels=1:4,
                               labels=c("<18.5", "18.5-<25", "25-<30", ">=30")))

# design for WHO standardization
design.statin_dm_diag = svydesign(data = combined.statin_dm_diag, strata = ~ stratum_num, ids = ~ psu_num, weights = ~ w3_who_40_69, nest = T)
saveRDS(design.statin_dm_diag, file="design.statin_dm_diag.WHO.rds")

# design for no WHO standardization
design.statin_dm_diag = svydesign(data = combined.statin_dm_diag, strata = ~ stratum_num, ids = ~ psu_num, weights = ~ w3_crude_40_69, nest = T)
saveRDS(design.statin_dm_diag, file="design.statin_dm_diag.CRUDE.rds")